In [1]:
import pandas as pd
from collections import OrderedDict
from bokeh.charts import Bar
from bokeh.plotting import output_notebook, show
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d


BokehJS successfully loaded.

In [24]:
output_notebook()

In [2]:
import targetsmosh as tm

In [20]:
reload(tm)


Out[20]:
<module 'targetsmosh' from 'targetsmosh.py'>

In [ ]:
vcounts2013 = tm.get_vcounts(p, 2013)
tm.summary(vcounts2013, p)

In [4]:
p = pd.read_pickle('../CDPdata/sheet12_2014.pkl')

In [4]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12_5 = pd.read_pickle("../CDPdata/2010to2014scopes12fiveyrs.pkl")
companies = scopes12.index.levels[0].tolist()
companies5 = scopes12_5.index.levels[0].tolist()

In [3]:
s12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")
s12cos = s12_c.index.levels[0].tolist()

In [11]:
targets_yr={}
for yr in range(2010,2015):
    p = pd.read_pickle("../CDPdata/sheet"+str(tm.deets[yr]["summary"]["sheet"])+"_" + str(yr) + ".pkl")
    targets_yr[yr] = tm.get_targets(p, yr)
    targets_yr[yr]["year"] = yr

clean up 2010 manually


In [12]:
# oh i need to clean up 2010 for realz
counts2010 = targets_yr[2010]["Organisation"].value_counts()
multiples2010 = counts2010[counts2010>1].index.tolist()
# targets_yr[2010]["target type"].value_counts()

In [13]:
targets_yr[2010].drop_duplicates("Organisation",inplace=True)

In [14]:
len(multiples2010)
targets_yr[2010] = targets_yr[2010][targets_yr[2010]["Organisation"].isin(s12cos)]
targets_yr[2010].to_pickle("../CDPdata/2010targets_cleaned.pkl")

combine them all


In [15]:
alltargets = pd.concat(targets_yr.values())
targets = alltargets[alltargets["Organisation"].isin(s12cos)]
len(targets) # 7861


Out[15]:
7861

In [16]:
targets.to_pickle("../CDPdata/targets_all.pkl")
len(targets.index.value_counts().index) # 2090


Out[16]:
2090

In [27]:
targets.head()


Out[27]:
Organisation target type has absolute has intensity
year
2010 Actelios SpA NaN False False
2010 Bharat Petroleum Corporation NaN False False
2010 EDP - Energias de Portugal S.A. Other: GHG Emission Reduction False False
2010 Ernst & Young LLP UK Other: Proportion of total electricity which i... False False
2010 Fortune Fashions Industries NaN False False

graph target setting


In [18]:
targets.set_index("year", inplace=True)

In [35]:
# targets.reset_index(inplace=True)
targets["abs count"] = 1
targets["int count"] = 1
target_yrgs = targets.groupby("year")
# target_yrgs.describe()


-c:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
-c:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [37]:
t_sums = target_yrgs.sum()
t_sums["abs percent"] = t_sums["has absolute"]/t_sums["abs count"]
t_sums["int percent"] = t_sums["has intensity"]/t_sums["int count"]

In [38]:
t_sums


Out[38]:
level_0 index has absolute has intensity abs count int count abs percent int percent
year
2010 901153 901153 411 325 1343 1343 0.306031 0.241996
2011 3056738 3056738 608 624 1471 1471 0.413324 0.424201
2012 5936685 5936685 678 756 1635 1635 0.414679 0.462385
2013 8884680 8884680 749 832 1680 1680 0.445833 0.495238
2014 12114474 12114474 786 919 1732 1732 0.453811 0.530600

In [20]:
valuest_summary = OrderedDict()
valuest_summary["% With Absolute Targets"] = abs_percents
valuest_summary["% With Intensity Targets"] = int_percents

In [21]:
years = []
for yr in range(2010,2015):
    years.append(str(yr))

In [22]:
colors = brewer["Spectral"][3]
bar = Bar(valuest_summary, years, title="Targets Set for Companies That Reported 5 Years", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies",
           xlabel="Year", legend ="bottom_right")

In [25]:
show(bar)


make targets charts by country and sector


In [6]:
targets5 = pd.read_pickle("../CDPdata/2010to2014targets5yrs.pkl")
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl").reset_index().set_index("Organisation")

In [7]:
# combine with orginfos in order to get sector and country numbers
targets5= targets5.reset_index().set_index(["Organisation"])

In [8]:
len(set(targets5.index)) # 654


Out[8]:
654

In [9]:
targets5 = targets5.join(orginfos[["Country", "GICS Sector"]])
targets5["GICS Sector"].replace("Banks", "Financials", inplace=True)
targets5 = targets5.reset_index().set_index("Organisation").sort_index()

In [10]:
targets5["has absolute"] = targets5["has absolute"].apply(lambda(x): int(x))
targets5["has intensity"] = targets5["has intensity"].apply(lambda(x): int(x))

In [11]:
targets5_c = targets5.groupby(["year","Country"])
targets5_s = targets5.groupby(["year","GICS Sector"])
targets5_csums = targets5_c.sum()
targets5_ssums = targets5_s.sum()

In [12]:
scopes12_5["GICS Sector"].replace("Banks", "Financials", inplace=True)
scopes12_5 = scopes12_5.reset_index().set_index("Organisation").sort_index()

In [13]:
scopes12_5cd = scopes12_5.groupby(["year","Country"]).describe().reset_index()
scopes12_5sd = scopes12_5.groupby(["year","GICS Sector"]).describe().reset_index()

In [15]:
scopes12_5sdis = scopes12_5sd[scopes12_5sd["level_2"]=="count"][["year", "GICS Sector","scope1", "scope2"]]
scopes12_5sdis.set_index(["year", "GICS Sector"],inplace=True)
scopes12_5cdis = scopes12_5cd[scopes12_5cd["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scopes12_5cdis.set_index(["year", "Country"],inplace=True)

In [16]:
scopes12_5cdis.head()
targets5_csums.head()
# targets5_csums.loc[2010,"USA"]


Out[16]:
index has absolute has intensity
year Country
2010 Argentina 192 0 0
Australia 5804 1 0
Austria 213 1 1
Belgium 4631 3 1
Bermuda 90 0 1

In [17]:
def mergest(s, t):
    st = {}
    for yr in range(2010, 2015):
        st[yr] = s.loc[yr][["scope1","scope2"]]
        tyr = t.loc[yr]
        st[yr] = st[yr].join(tyr[["has absolute", "has intensity"]])
        st[yr].fillna(0,inplace=True)
        st[yr]["year"] = yr
        st[yr].sort("scope1", ascending = 0, inplace=True)
    sts = pd.concat(st.values())
    sts["abspercent"]  = sts["has absolute"]/sts["scope1"]*100.0
    sts["intpercent"]  = sts["has intensity"]/sts["scope1"]*100.0
    return sts

In [26]:
scopestargets_c = mergest(scopes12_5cdis, targets5_csums).reset_index().set_index(["year", "Country"]).sort_index()
scopestargets_s = mergest(scopes12_5sdis, targets5_ssums).reset_index().set_index(["year", "GICS Sector"]).sort_index()

In [56]:
sectors = scopestargets_s.index.levels[1].tolist()
stvalues_s = OrderedDict()
for i in scopestargets_s.index.levels[0]:
    stvalues_s[str(i)] = scopestargets_s.loc[i]["intpercent"].values

In [72]:
stvalues_c = OrderedDict()
for i in scopestargets_c.index.levels[0]:
    yrvalues = []
    for c in countries:
        yrvalues.append(scopestargets_c.loc[i,c]["intpercent"]) 
    stvalues_c[str(i)] = yrvalues

In [69]:
countries = scopestargets_c.loc[2010].sort("scope1", ascending =0).index.tolist()[0:9]
# scopestargets_s.loc["Consumer Discretionary"]["abspercent"].values

In [75]:
#bar = Bar(stvalues_s, sectors, title="Intensity Targets by Sector", filename="scope12_disclosure.html",
#           stacked=False, ylabel="% of Companies", xlabel="Sector", legend ="top_right")
bar = Bar(stvalues_c, countries, title="Intensity Targets by Country", filename="scope12_disclosure.html",
           stacked=False, ylabel="% of Companies", xlabel="Countries", legend ="top_right")
bar.y_range.end = 100

In [76]:
barci = bar
show(bar)


get target details


In [29]:
xls = pd.ExcelFile('../CDPdata/Investor CDP 2013_Public Data.xlsx')

In [36]:
xls2012 = pd.ExcelFile('../CDPdata/Investor CDP 2012_Public Data.xlsx')

In [52]:
p = xls2012.parse(14)
p2 = xls2012.parse(16)

In [56]:
p.to_pickle('../CDPdata/2012sheet14.pkl')
p2.to_pickle('../CDPdata/2012sheet16.pkl')

In [55]:
# 2014
p.columns[12:20]


Out[55]:
Index([u'3.1d. Please provide details on your progress against this target made in the reporting year\n\n3.1d. ID', u'3.1d. % complete (time)', u'3.1d. % complete (emissions)', u'3.1d. Comment'], dtype='object')

In [24]:
p2[p2.columns[15]].value_counts()[0:10]

In [16]:
p2[p2.columns[18]].value_counts()


Out[16]:
metric tonnes CO2e per unit revenue                             228
metric tonnes CO2e per FTE employee                             168
metric tonnes CO2e per metric tonne of product                  135
metric tonnes CO2e per unit of production                       112
metric tonnes CO2e per square meter                             108
metric tonnes CO2e per megawatt hour (MWh)                       42
grams CO2 per kilometer                                          23
metric tonnes CO2e per unit hour worked                          22
metric tonnes CO2e per square foot                               22
metric tonnes CO2e per unit of service provided                  17
metric tonnes CO2e per vehicle produced                          11
Other: metric tonnes of fugitive CO2e/tonnes CH4 produced by landfill     10
metric tonnes CO2e per kilometer                                  7
metric tonnes CO2e per barrel of oil equivalent (BOE)             7
Other:                                                            7
...
Other: U.S. tons CO2 emitted per MWh                            1
Other: metric tonnes CO2e per million dollars in sales          1
Other: tOther: tonnes of CO2e per tonnes of clinker             1
Other: kilograms of CO2e per thousand gross-ton-mile            1
Other: Crude oil equivalent KL /Net Sales                       1
Other: metric tonnes CO2e per unit GP                           1
Other: 実質生産高原単位指数改善率                                            1
Other: metric tonnes CO2e per unit shipped                      1
Other: mt CO2e/GC, converted from actual goal metric of restaurant kWh per number of sales transactions, or "guest counts" (GC)    1
Other: kg CO2/ton clinker                                       1
Other: grams CO2e per revenue ton mile                          1
Other: metric tons of CO2e avoided per tons of CO2e emitted     1
Other: CO2 per transferred mobile data (kg CO2/GB)              1
Other: tonnes CO2(e) relative to the value of net written premiums    1
Other: ATM1台の排出量                                                1
Length: 427, dtype: int64

In [ ]: